* ImeIme Umana 
* 12/26/2016
* nc_evote_analysis.do 
* Program to analyze North Carolina early voting locations 

* NOTES: 
* Addresses geocoded using http://www.findlatitudeandlongitude.com/batch-geocode/
* (Unmatched addresses coded using google maps) 
* Data extrated from PDFs using Tabula (post-processed manually)

clear all
set more off 

cd "~/Downloads/nc_evote_umana"  // Change to local directory


* PART 0:  HAVERSINE.ado
* Program for calculating great circle distances between pairs of (lat, lon) coordinates
capture program drop haversine
program define haversine, rclass
	args lat1 lon1 lat2 lon2
	tempvar dlat dlon tmp_lat1 tmp_lat2 a c
	gen `dlat' = (`lat2' - `lat1') * _pi / 180
	gen `dlon' = (`lon2' - `lon1') * _pi / 180
	gen `tmp_lat1' = `lat1' * _pi / 180
	gen `tmp_lat2' = `lat2' * _pi / 180
	gen `a' = (sin(`dlat'/2))^2 + cos(`tmp_lat1') * cos(`tmp_lat2') * (sin(`dlon'/2))^2
	gen `c' = 2 * atan2( sqrt(`a'), sqrt(1 - `a') )
	gen distance = 3961 * `c'
end


* PART 1: Construct data on early voting locations in North Carolina 

* Import and process geographic data for 2016 early voting locations
import excel using source/early_voting_locations.xls, first clear
keep OBJECTID County Name latitude longitude
ren *, lower 
ren objectid evote_fid_2016
replace name = "CARRBORO TOWN HALL" if name == "  CARRBORO TOWN HALL"
ren county county_name 
merge m:1 county_name using source/county_fips_codes, nogen
ren (county_name county_fips) (county countyfp) 
save derived/early_voting_locations_2016, replace 

* Import schedule data for 2016 early voting locations 
import excel using source/early_voting_schedules.xlsx, first clear
keep County Name Date Time 
ren *, lower 
replace name = "CARRBORO TOWN HALL" if name == "  CARRBORO TOWN HALL"
g open    = time ~= "Closed" 
g sunday  = date == 20750 | date == 20757
g firstwk = date <= 20754
g open_firstwk = open * firstwk
g open_sunday  = open * sunday 
save derived/early_voting_schedule_2016, replace 

* Process schedule data for 2016 early voting locations
collapse (sum) open* , by(county name) 
ren open* num_open*
g dum_open_firstwk = num_open_firstwk > 0 
g dum_open_sunday  = num_open_sunday  > 0 
tempfile tmp
save "`tmp'" 
use derived/early_voting_locations_2016, clear 
merge 1:1 county name using "`tmp'", nogen
ren (county name latitude longitude) (ev_county ev_name ev_lat ev_lon)
save derived/early_voting_data_2016, replace 

* Import and process geographic data for 2012 early voting locations
insheet using source/2012_ev_locations.csv, names clear
g zip = substr(address_2 , -5, 5)
destring zip, replace
keep id name latitude longitude zip
merge m:1 zip using source/zip_cty_cw, keep(3) nogen
g county_fips = cty - 1000*floor(cty/1000)
merge m:1 county_fips using source/county_fips_codes, nogen
ren (id name latitude longitude county_name county_fips) ///
	(evote_fid_2012 ev_name ev_lat ev_lon ev_county countyfp) 
keep evote_fid ev_county ev_name ev_lat ev_lon countyfp
save derived/early_voting_locations_2012, replace 


* PART 2: Construct data file for North Carolina Voting Tabulation Districts 
import excel source/nc_data_full.xls, first clear

drop NAME Cent___X Cent__Y XY ORIG_FID LSAD NAMELSAD NAME FUNCSTAT MTFCC NEAR*
ren (INTPTLAT10 INTPTLON10 GEOID10) (cent_lat cent_lon ncvtd)
ren *, lower
ren *10 * 
ren (total_popu white__nh_ black__nh_ hispanic n_asian_an native_ame other__nh_) ///
    (tot_pop white_pop black_pop hisp_pop asian_pop native_pop other_pop) 
ren (total_pop2 white_18__ black_18__ hispanic_1 asian_and_ native_am2 other_18__) ///
    (tot_pop_18p white_pop_18p black_pop_18p hisp_pop_18p asian_pop_18p native_pop_18p other_pop_18p) 
order ncvtd
destring cent_lat cent_lon statefp countyfp, replace
g pct_white = white_pop_18p/tot_pop_18p
g pct_black = black_pop_18p/tot_pop_18p
g pct_hisp  = hisp_pop_18p /tot_pop_18p
g density   = 1000 * tot_pop_18p/aland

* Join early voting location data and select nearest (within-county) polling place
joinby countyfp using derived/early_voting_data_2016
haversine cent_lat cent_lon ev_lat ev_lon
sort ncvtd  distance 
bys  ncvtd (distance) : keep if _n == 1
ren distance dist_2016
drop ev_* 

joinby countyfp using derived/early_voting_locations_2012
haversine cent_lat cent_lon ev_lat ev_lon
sort ncvtd  distance 
bys  ncvtd (distance) : keep if _n == 1
ren distance dist_2012
drop ev_* 

g diff_dist = dist_2016 - dist_2012
g ln_density = ln(density) 

save derived/nc_vtd_data_full, replace 


* PART 3: Analysis

* Define macros for mapping
global sp_pt_16 "point(data(derived/early_voting_locations_2016) x(longitude) y(latitude) size(small) fcolor(lime))"

global sp_pt_12 "point(data(derived/early_voting_locations_2012) x(ev_lon) y(ev_lat) size(small) fcolor(lime))"

* Generate basic maps relating demographic/voting patterns to 2016 locations
replace pct_black = pct_black * 100
maptile pct_black, geo(ncvtd) fcolor(Blues) n(9) spopt(${sp_pt_16} ///
	tit("Percent Black and 2016 Early Vote Locations"))
graph export tables_figures/nc_map_black_2016.png, replace 
replace pct_black = pct_black / 100

maptile density,   geo(ncvtd) fcolor(OrRd)  n(9) spopt(${sp_pt_16} ///
	tit("Population Density and 2016 Early Vote Locations"))
graph export tables_figures/nc_map_density_2016.png, replace 

maptile pctobama,  geo(ncvtd) fcolor(RdBu)       spopt(${sp_pt_16} ///
	tit("Obama Vote Percentage and 2016 Early Vote Locations")) ///
	cutv(.25 .35 .4 .45 .5 .55 .6 .65 .75) 
graph export tables_figures/nc_map_obama_2016.png, replace 

* Generate basic maps relating demographic/voting patterns to 2016 locations
replace pct_black = pct_black * 100
maptile pct_black, geo(ncvtd) fcolor(Blues) n(9) spopt(${sp_pt_12} ///
	tit("Percent Black and 2012 Early Vote Locations"))
graph export tables_figures/nc_map_black_2012.png, replace 
replace pct_black = pct_black / 100

maptile density,   geo(ncvtd) fcolor(OrRd)  n(9) spopt(${sp_pt_12} ///
	tit("Population Density and 2012 Early Vote Locations"))
graph export tables_figures/nc_map_density_2012.png, replace 

maptile pctobama,  geo(ncvtd) fcolor(RdBu)       spopt(${sp_pt_12} ///
	tit("Obama Vote Percentage and 2012 Early Vote Locations")) ///
		cutv(.25 .35 .4 .45 .5 .55 .6 .65 .75) 
graph export tables_figures/nc_map_obama_2012.png, replace 


* Table 1: 2016 distance 
eststo clear 
_eststo black_nofe : reg dist_2016 pct_black 	  ln_density [w=tot_pop_18p]
_eststo black_ctyfe: reg dist_2016 pct_black 	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo hisp_nofe  : reg dist_2016 pct_hisp  	  ln_density [w=tot_pop_18p]
_eststo hisp_ctyfe : reg dist_2016 pct_hisp  	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo white_nofe : reg dist_2016 pct_white 	  ln_density [w=tot_pop_18p]
_eststo white_ctyfe: reg dist_2016 pct_white 	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo obama_nofe : reg dist_2016 pctobama       ln_density [w=tot_pop_18p]
_eststo obama_ctyfe: reg dist_2016 pctobama       ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo all_nofe   : reg dist_2016 pct_* pctobama ln_density [w=tot_pop_18p]
_eststo all_ctyfe  : reg dist_2016 pct_* pctobama ln_density [w=tot_pop_18p], absorb(countyfp)
esttab _all using tables_figures/table1.csv, ///
	replace order(pct_black pct_hisp pct_white pctobama density) se r2
preserve 
insheet  using tables_figures/table1.csv, comma clear 
dataout , save(tables_figures/table1.tex) tex nohead replace 
restore

* Table 2: 2012 distance + difference 
eststo clear 
_eststo black_2012: reg dist_2012 pct_black 	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo hisp_2012 : reg dist_2012 pct_hisp  	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo white_2012: reg dist_2012 pct_white 	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo obama_2012: reg dist_2012 pctobama     	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo all_2012  : reg dist_2012 pct_* pctobama ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo black_diff: reg diff_dist pct_black 	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo hisp_diff : reg diff_dist pct_hisp  	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo white_diff: reg diff_dist pct_white 	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo obama_diff: reg diff_dist pctobama     	 ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo all_diff  : reg diff_dist pct_* pctobama ln_density [w=tot_pop_18p], absorb(countyfp)
esttab _all using tables_figures/table2.csv, ///
	replace order(pct_black pct_hisp pct_white pctobama density) se r2
preserve 
insheet  using tables_figures/table2.csv, comma clear 
dataout , save(tables_figures/table2.tex) tex nohead replace
restore

* Table 3: 2016 schedule 
eststo clear 
_eststo black_fw: reg dum_open_firstwk pct_black 	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo hisp_fw : reg dum_open_firstwk pct_hisp  	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo white_fw: reg dum_open_firstwk pct_white 	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo obama_fw: reg dum_open_firstwk pctobama       ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo all_fw  : reg dum_open_firstwk pct_* pctobama ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo black_su: reg dum_open_sunday  pct_black 	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo hisp_su : reg dum_open_sunday  pct_hisp  	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo white_su: reg dum_open_sunday  pct_white 	  ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo obama_su: reg dum_open_sunday  pctobama       ln_density [w=tot_pop_18p], absorb(countyfp)
_eststo all_su  : reg dum_open_sunday  pct_* pctobama ln_density [w=tot_pop_18p], absorb(countyfp)
esttab _all using tables_figures/table3.csv, ///
	replace order(pct_black pct_hisp pct_white pctobama density) se r2  
preserve 
insheet  using tables_figures/table3.csv, comma clear 
dataout , save(tables_figures/table3.tex) tex nohead replace
restore

* Selected binscatters
binscatter dist_2016 ln_density	[w=tot_pop_18p], absorb(countyfp) ///
	xtit("Log Population Density") ytit("Distance (Miles)") ///
	tit("Distance to Nearest Early Vote Location vs Log Population Density", size(medsmall)) 
graph export tables_figures/bin_ln_density_2016.png, replace

binscatter dist_2016 pct_black 	[w=tot_pop_18p], control(ln_density) absorb(countyfp) ///
	xtit("Percent Black") ytit("Distance (Miles)") ///
	tit("Distance to Nearest Early Vote Location vs Percent Black, 2016", size(medsmall)) 
graph export tables_figures/bin_black_2016.png, replace

binscatter dist_2012 pct_black 	[w=tot_pop_18p], control(ln_density) absorb(countyfp) ///
	xtit("Percent Black") ytit("Distance (Miles)") ///
	tit("Distance to Nearest Early Vote Location vs Percent Black, 2012", size(medsmall)) 
graph export tables_figures/bin_black_2012.png, replace 

binscatter diff pct_black 		[w=tot_pop_18p], control(ln_density) absorb(countyfp) ///
	xtit("Percent Black") ytit("Distance (Miles)") ///
	tit("Difference in Distance to Nearest Early Vote Location vs Percent Black", size(medsmall)) 
graph export tables_figures/bin_black_diff.png, replace 

binscatter diff pct_hisp		[w=tot_pop_18p], control(ln_density) absorb(countyfp) ///
	xtit("Percent Hispanic") ytit("Distance (Miles)") ///
	tit("Difference in Distance to Nearest Early Vote Location vs Percent Hispanic", size(medsmall)) 
graph export tables_figures/bin_hisp_diff.png, replace 

binscatter diff pct_white		[w=tot_pop_18p], control(ln_density) absorb(countyfp) ///
	xtit("Percent White") ytit("Distance (Miles)") ///
	tit("Difference in Distance to Nearest Early Vote Location vs Percent White", size(medsmall)) 
graph export tables_figures/bin_white_diff.png, replace 

binscatter diff pctobama		[w=tot_pop_18p], control(pct_* ln_density) absorb(countyfp) ///
	xtit("Obama Vote Percentage") ytit("Distance (Miles)") ///
	tit("Difference in Distance to Nearest Early Vote Location vs Obama Vote Percentage", size(medsmall)) 
graph export tables_figures/bin_obama_diff.png, replace 
